
 //-----------------------------------------------------------------------------
//
//Copyright(c) 2020, ThorsianWay Technologies Co, Ltd
//All rights reserved.
//
//IP Name       :   pixel_shader
//File Name     :   pixel_out.v
//Module name   :   pixel_out
//Full name     :   texel bilinear interpolaration
//
//Author        :   zha daolu
//Email         :   
//Data          :   2020/5/13
//Version       :   V1.00
//
//Abstract      :   
//                  
//Called  by    :   GPU
//
//Modification history
//-----------------------------------------------------
//1.00: intial version 
//
//-----------------------------------------------------------------------------  
module pixel_out(
    rst_n                     ,
    clk                       ,
    busy_in                   ,
    tex_type                  ,
    xy_fifo_empty             ,
    uv_end                    ,
    x_in                      ,
    y_in                      ,
    z_in                      ,
    fog_x_in                  ,
    fog_y_in                  ,
    fog_z_in                  ,
    r_in                      ,
    g_in                      ,
    b_in                      ,
    a_in                      , 
    xy_fifo_rd                ,
    param_en_in               ,
    param1_in                 ,
    param2_in                 ,
    param3_in                 ,
    param4_in                 ,
    param_fifo_rd             ,
    texel_32_0                ,
    texel_32_1                ,
    texel_32_2                ,
    texel_32_3                ,
    texel_32_en               ,
    r_out_gen                 ,
    g_out_gen                 ,
    b_out_gen                 ,
    a_out_gen                 ,
    x_out                     ,
    y_out                     ,
    z_out                     ,
    fog_x_out                 ,
    fog_y_out                 ,
    fog_z_out                 ,
    r_out                     ,
    g_out                     ,
    b_out                     ,
    a_out                     ,
    pixel_out_en              ,
    alpha_out_en              ,
    busy_out                  ,
    pixel_gen_end
);
localparam SIM_DXT1     = 4'd0 ;                     
localparam SIM_DXT3     = 4'd1 ;
localparam SIM_ALPHA8   = 4'd10;
localparam SIM_ALPHA1   = 4'd2 ;
localparam SIM_ALPHA2   = 4'd9 ;
localparam SIM_ALPHA4   = 4'd11;
localparam SIM_PURECOLOR= 4'd3 ;
localparam SIM_RGB565   = 4'd4 ;
localparam SIM_RGBA8888 = 4'd5 ;
localparam SIM_Y8       = 4'd6 ;
localparam SIM_YUV422   = 4'd7 ;
localparam SIM_PUREMASK = 4'd8 ;
localparam SIM_RGBA1555 = 4'd12;
localparam SIM_RGBA4444 = 4'd13;   


input rst_n                     ;
input clk                       ;
input busy_in                   ;
input [3:0] tex_type;
input xy_fifo_empty             ;
input uv_end                    ;
input [31:0] x_in                      ;
input [31:0] y_in                      ;
input [31:0] z_in                      ;
input [31:0] fog_x_in                  ;
input [31:0] fog_y_in                  ;
input [31:0] fog_z_in                  ;
input [7:0]  r_in                      ;
input [7:0]  g_in                      ;
input [7:0]  b_in                      ;
input [7:0]  a_in                      ; 
output reg xy_fifo_rd;
input param_en_in               ;
input [7:0] param1_in                 ;
input [7:0] param2_in                 ;
input [7:0] param3_in                 ;
input [7:0] param4_in                 ;
output reg param_fifo_rd             ;
input [31:0] texel_32_0                  ;
input [31:0] texel_32_1                  ;
input [31:0] texel_32_2                  ;
input [31:0] texel_32_3                  ;
input texel_32_en               ;
output reg [7:0] r_out_gen                 ;
output reg [7:0] g_out_gen                 ;
output reg [7:0] b_out_gen                 ;
output reg [7:0] a_out_gen                 ;
output reg [31:0] x_out                    ;
output reg [31:0] y_out                    ;
output reg [31:0] z_out                    ;
output reg [31:0] fog_x_out                ;
output reg [31:0] fog_y_out                ;
output reg [31:0] fog_z_out                ;
output reg [7:0]  r_out                    ;
output reg [7:0]  g_out                    ;
output reg [7:0]  b_out                    ;
output reg [7:0]  a_out                    ;  
output reg pixel_out_en              ;
output reg alpha_out_en              ;
output reg pixel_gen_end             ;
output busy_out;

reg [15:0] r_sum;
reg [15:0] g_sum;
reg [15:0] b_sum;
reg [15:0] a_sum;

reg pixel_out_en_pre;
reg alpha_out_en_pre;

reg [31:0] texel_32_ff1;
reg texel_32_en_ff1;
reg [31:0] texel_32_ff2;
reg uv_end_ff1;

reg param_en_in_ff1;
reg param_en_in_ff2;
reg param_en_in_ff3;

//color1 x parameter1 
wire [15:0] a1_x_p1;
wire [15:0] r1_x_p1;
wire [15:0] g1_x_p1;
wire [15:0] b1_x_p1;
//color2 x parameter2 
wire [15:0] a2_x_p2;
wire [15:0] r2_x_p2;
wire [15:0] g2_x_p2;
wire [15:0] b2_x_p2;
//color3 x parameter3 
wire [15:0] a3_x_p3;
wire [15:0] r3_x_p3;
wire [15:0] g3_x_p3;
wire [15:0] b3_x_p3;
//color4 x parameter4 
wire [15:0] a4_x_p4;
wire [15:0] r4_x_p4;
wire [15:0] g4_x_p4;
wire [15:0] b4_x_p4;

reg [15:0] a1_x_p1_ff1;
reg [15:0] r1_x_p1_ff1;
reg [15:0] g1_x_p1_ff1;
reg [15:0] b1_x_p1_ff1;
reg [15:0] a2_x_p2_ff1;
reg [15:0] r2_x_p2_ff1;
reg [15:0] g2_x_p2_ff1;
reg [15:0] b2_x_p2_ff1;
reg [15:0] a3_x_p3_ff1;
reg [15:0] r3_x_p3_ff1;
reg [15:0] g3_x_p3_ff1;
reg [15:0] b3_x_p3_ff1;
reg [15:0] a4_x_p4_ff1;
reg [15:0] r4_x_p4_ff1;
reg [15:0] g4_x_p4_ff1;
reg [15:0] b4_x_p4_ff1;

wire [7:0] inter_temp0;
wire [7:0] inter_temp1; 
wire [7:0] inter_temp2; 
wire [7:0] inter_temp3;

assign inter_temp0 = a1_x_p1[15:8]  + a1_x_p1[7];
assign inter_temp1 = a2_x_p2[15:8]  + a2_x_p2[7]; 
assign inter_temp2 = a3_x_p3[15:8]  + a3_x_p3[7]; 
assign inter_temp3 = a4_x_p4[15:8]  + a4_x_p4[7]; 


//*****************************
//stage 1 cal color x parameter
//*****************************
mult_8x8 u_a1_x_p1(
    .a(texel_32_0[31:24]),
    .b(param1_in),
    .out(a1_x_p1)
);

mult_8x8 u_r1_x_p1(
    .a(texel_32_0[23:16]),
    .b(param1_in),
    .out(r1_x_p1)
);

mult_8x8 u_g1_x_p1(
    .a(texel_32_0[15:8]),
    .b(param1_in),
    .out(g1_x_p1)
);

mult_8x8 u_b1_x_p1(
   .a(texel_32_0[7:0]),
   .b(param1_in),
   .out(b1_x_p1)
);

mult_8x8 u_a2_x_p2(
    .a(texel_32_1[31:24]),
    .b(param2_in),
    .out(a2_x_p2)
);

mult_8x8 u_r2_x_p2(
    .a(texel_32_1[23:16]),
    .b(param2_in),
    .out(r2_x_p2)
);

mult_8x8 u_g2_x_p2(
    .a(texel_32_1[15:8]),
    .b(param2_in),
    .out(g2_x_p2)
);

mult_8x8 u_b2_x_p2(
    .a(texel_32_1[7:0]),
    .b(param2_in),
    .out(b2_x_p2)
);

mult_8x8 u_a3_x_p3(
    .a(texel_32_2[31:24]),
    .b(param3_in),
    .out(a3_x_p3)
);

mult_8x8 u_r3_x_p3(
    .a(texel_32_2[23:16]),
    .b(param3_in),
    .out(r3_x_p3)
);

mult_8x8 u_g3_x_p3(
    .a(texel_32_2[15:8]),
    .b(param3_in),
    .out(g3_x_p3)
);

mult_8x8 u_b3_x_p3(
    .a(texel_32_2[7:0]),
    .b(param3_in),
    .out(b3_x_p3)
);

mult_8x8 u_a4_x_p4(
    .a(texel_32_3[31:24]),
    .b(param4_in),
    .out(a4_x_p4)
);

mult_8x8 u_r4_x_p4(
    .a(texel_32_3[23:16]),
    .b(param4_in),
    .out(r4_x_p4)
);

mult_8x8 u_g4_x_p4(
    .a(texel_32_3[15:8]),
    .b(param4_in),
    .out(g4_x_p4)
);

mult_8x8 u_b4_x_p4(
    .a(texel_32_3[7:0]),
    .b(param4_in),
    .out(b4_x_p4)
);
//registered mul result
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			a1_x_p1_ff1  <=  16'b0;
			r1_x_p1_ff1  <=  16'b0;
			g1_x_p1_ff1  <=  16'b0;
			b1_x_p1_ff1  <=  16'b0;
			a2_x_p2_ff1  <=  16'b0;
			r2_x_p2_ff1  <=  16'b0;
			g2_x_p2_ff1  <=  16'b0;
			b2_x_p2_ff1  <=  16'b0;
			a3_x_p3_ff1  <=  16'b0;
			r3_x_p3_ff1  <=  16'b0;
			g3_x_p3_ff1 <=  16'b0;
			b3_x_p3_ff1 <=  16'b0;
			a4_x_p4_ff1 <=  16'b0;
			r4_x_p4_ff1 <=  16'b0;
			g4_x_p4_ff1 <=  16'b0;
			b4_x_p4_ff1 <=  16'b0;	
			param_en_in_ff1 <=  1'b0;
		end
	else if(~busy_in)
		begin
			a1_x_p1_ff1  <=  a1_x_p1;
			r1_x_p1_ff1  <=  r1_x_p1;
			g1_x_p1_ff1  <=  g1_x_p1;
			b1_x_p1_ff1  <=  b1_x_p1;
			a2_x_p2_ff1  <=  a2_x_p2;
			r2_x_p2_ff1  <=  r2_x_p2;
			g2_x_p2_ff1  <=  g2_x_p2;
			b2_x_p2_ff1  <=  b2_x_p2;
			a3_x_p3_ff1  <=  a3_x_p3;
			r3_x_p3_ff1  <=  r3_x_p3;
			g3_x_p3_ff1 <=  g3_x_p3;
			b3_x_p3_ff1 <=  b3_x_p3;
			a4_x_p4_ff1 <=  a4_x_p4;
			r4_x_p4_ff1 <=  r4_x_p4;
			g4_x_p4_ff1 <=  g4_x_p4;
			b4_x_p4_ff1 <=  b4_x_p4;
			param_en_in_ff1 <=  param_en_in;
		end
end  

//read parameter fifo
always@(*)
begin
	param_fifo_rd = texel_32_en && ~busy_in;//&& next_state == STP0;
end 

//registered texel 0 
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			texel_32_ff1 <=  32'b0;
		end
	else if(texel_32_en && ~busy_in)
		begin
			texel_32_ff1 <=  texel_32_0;
		end
end

//registered texel valid
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			texel_32_en_ff1 <=  1'b0;
		end
	else if(~busy_in)
		begin
			texel_32_en_ff1 <=  texel_32_en;
		end
end

//read xy fifo & output valid pre
//
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			pixel_out_en_pre <=  1'b0;
			alpha_out_en_pre <=  1'b0;
		end
	else if(~busy_in)// && next_state == STP0)
		begin
			if(tex_type == SIM_ALPHA8 || tex_type == SIM_ALPHA4 || tex_type == SIM_ALPHA2 || tex_type == SIM_ALPHA1 || tex_type == SIM_PUREMASK)
				begin
					alpha_out_en_pre <=  texel_32_en;
					pixel_out_en_pre <=  1'b0;
				end
			else
				begin
					alpha_out_en_pre <=  1'b0;
					pixel_out_en_pre <=  texel_32_en;				
				end
		end
end  



//************************
//stage 2 sum 4 mul result
//************************

always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			r_sum <=  16'b0;	
			g_sum <=  16'b0;
			b_sum <=  16'b0;
			a_sum <=  16'b0;
			param_en_in_ff2 <=  1'b0;
		end
	else if(~busy_in)
		begin
			r_sum <=  r1_x_p1_ff1 + r2_x_p2_ff1 +  r3_x_p3_ff1 + r4_x_p4_ff1; 	
			g_sum <=  g1_x_p1_ff1 + g2_x_p2_ff1 + g3_x_p3_ff1 + g4_x_p4_ff1;
			b_sum <=  b1_x_p1_ff1 + b2_x_p2_ff1 + b3_x_p3_ff1 + b4_x_p4_ff1;
			a_sum <=  a1_x_p1_ff1 + a2_x_p2_ff1 +  a3_x_p3_ff1 + a4_x_p4_ff1;
			param_en_in_ff2 <=  param_en_in_ff1;
		end
end

//registered texel 0
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			texel_32_ff2 <=  32'b0;
		end
	else if(texel_32_en_ff1 && ~busy_in)
		begin
			texel_32_ff2 <=  texel_32_ff1;
		end
end  

//*******************
//stage3 final result
//*******************

//output rgba
always@(*)
begin
	if(~param_en_in_ff2)
		begin
			r_out_gen = texel_32_ff2[15:8];
			g_out_gen = texel_32_ff2[15:8];
			b_out_gen = texel_32_ff2[15:8];
			a_out_gen = texel_32_ff2[31:24];
		end
	else
		begin
			r_out_gen = r_sum[15:8];
			g_out_gen = g_sum[15:8];
			b_out_gen = b_sum[15:8];
			a_out_gen = a_sum[15:8];			
		end
end

//output valid
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			pixel_out_en <=  1'b0;
			alpha_out_en <=  1'b0;		
		end
	else if(~busy_in)
		begin
			pixel_out_en <=  pixel_out_en_pre;
			alpha_out_en <=  alpha_out_en_pre;		
		end
end

/*always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			x_out     <= 32'b0;
			y_out     <= 32'b0;
            z_out     <= 32'b0;
            fog_x_out <= 32'b0;
            fog_y_out <= 32'b0;
            fog_z_out <= 32'b0;
            r_out     <= 8'b0;
            g_out     <= 8'b0;
            b_out     <= 8'b0;
            a_out     <= 8'b0; 
		end
	else if(~busy_in)
		begin
            x_out     <= x_in    ;
			y_out     <= y_in    ;
            z_out     <= z_in    ;
            fog_x_out <= fog_x_in;
            fog_y_out <= fog_y_in;
            fog_z_out <= fog_z_in;
            r_out     <= r_in    ;
            g_out     <= g_in    ;
            b_out     <= b_in    ;
            a_out     <= a_in    ;
		end
end  */

always@(*)
begin
    x_out     = x_in    ;
	y_out     = y_in    ;
    z_out     = z_in    ;
    fog_x_out = fog_x_in;
    fog_y_out = fog_y_in;
    fog_z_out = fog_z_in;
    r_out     = r_in    ;
    g_out     = g_in    ;
    b_out     = b_in    ;
    a_out     = a_in    ;
end    

always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		uv_end_ff1 <=  1'b0;
	else if(pixel_gen_end)
		uv_end_ff1 <=  1'b0;
	else if(uv_end)
		uv_end_ff1 <=  1'b1;
end

always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		pixel_gen_end <=  1'b0;
	else if(pixel_gen_end)
		pixel_gen_end <=  1'b0;
	else if(uv_end_ff1 && xy_fifo_empty)
		pixel_gen_end <=  1'b1;
	else
		pixel_gen_end <=  1'b0;
end


assign xy_fifo_rd = (pixel_out_en || alpha_out_en) && ~busy_in;
assign busy_out = busy_in;
endmodule

